library(Seurat)
library(princurve)
library(Revelio)
library(monocle)
library(gprofiler2)
library(seriation)
library(Matrix)
library(dplyr)
library(RColorBrewer)
library(ggplot2)
library(ggExtra)
library(cowplot)
library(wesanderson)
#Set ggplot theme as classic
theme_set(theme_classic())WT.KO.integrated <- readRDS("WT_KO.integrated.RDS")DimPlot(WT.KO.integrated,
reduction = "integrated_spring",
group.by = "Lineage",
pt.size = 1,
cols = c("#cc391b","#e7823a","#969696","#026c9a")
) + NoAxes()Neurons.data <- subset(WT.KO.integrated,
subset = Lineage %in% c("Cajal-Retzius_neurons", "Pallial_neurons") & orig.ident == "Hem1" & Cell.ident.WT %in% c("Cajal-Retzius_neurons", "Pallial_neurons"))
DimPlot(Neurons.data,
group.by = "Lineage",
reduction = "integrated_spring",
pt.size = 1,
cols = c("#cc391b","#026c9a")
) + NoAxes()rm(WT.KO.integrated)
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3373039 180.2 6126737 327.3 5658538 302.2
## Vcells 85671957 653.7 997791514 7612.6 1064969141 8125.1
Trajectories.Hem <- Neurons.data@meta.data %>%
select("Barcodes", "nCount_RNA", "Spring_1", "Spring_2", "AP_signature","BP_signature", "EN_signature", "LN_signature", "Lineage") %>%
filter(Lineage == "Cajal-Retzius_neurons")fit <- principal_curve(as.matrix(Trajectories.Hem[,c("Spring_1", "Spring_2")]),
smoother='lowess',
trace=TRUE,
f = .7,
stretch=0)## Starting curve---distance^2: 11526697569
## Iteration 1---distance^2: 13397620
## Iteration 2---distance^2: 13420664
## Iteration 3---distance^2: 13422248
#The principal curve smoothed
Hem.pc.line <- as.data.frame(fit$s[order(fit$lambda),])
#Pseudotime score
Trajectories.Hem$PseudotimeScore <- fit$lambda/max(fit$lambda)if (cor(Trajectories.Hem$PseudotimeScore, Neurons.data@assays$RNA@data['Hmga2', Trajectories.Hem$Barcodes]) > 0) {
Trajectories.Hem$PseudotimeScore <- -(Trajectories.Hem$PseudotimeScore - max(Trajectories.Hem$PseudotimeScore))
}Trajectories.Pallial <- Neurons.data@meta.data %>%
select("Barcodes", "nCount_RNA", "Spring_1", "Spring_2", "AP_signature","BP_signature", "EN_signature", "LN_signature", "Lineage") %>%
filter(Lineage == "Pallial_neurons")fit <- principal_curve(as.matrix(Trajectories.Pallial[,c("Spring_1", "Spring_2")]),
smoother='lowess',
trace=TRUE,
f = .7,
stretch=0)## Starting curve---distance^2: 6447222830
## Iteration 1---distance^2: 10179991
## Iteration 2---distance^2: 10187204
#The principal curve smoothed
Pallial.pc.line <- as.data.frame(fit$s[order(fit$lambda),])
#Pseudotime score
Trajectories.Pallial$PseudotimeScore <- fit$lambda/max(fit$lambda)if (cor(Trajectories.Pallial$PseudotimeScore, Neurons.data@assays$RNA@data['Hmga2', Trajectories.Pallial$Barcodes]) > 0) {
Trajectories.Pallial$PseudotimeScore <- -(Trajectories.Pallial$PseudotimeScore - max(Trajectories.Pallial$PseudotimeScore))
}Trajectories.neurons <- rbind(Trajectories.Pallial, Trajectories.Hem)cols <- brewer.pal(n =11, name = "Spectral")
ggplot(Trajectories.neurons, aes(Spring_1, Spring_2)) +
geom_point(aes(color=PseudotimeScore), size=2, shape=16) +
scale_color_gradientn(colours=rev(cols), name='Speudotime score') +
geom_line(data=Pallial.pc.line, color="#026c9a", size=0.77) +
geom_line(data=Hem.pc.line, color="#cc391b", size=0.77)Since we observe the first 25% of both trajectories are occupied by few, likely progenitor cells, we shift this cell along the axis
Pseudotime.intervals <- Trajectories.neurons%>%
select(Lineage, PseudotimeScore) %>%
mutate(Pseudotime.bins = cut(Trajectories.neurons$PseudotimeScore, seq(0, max(Trajectories.neurons$PseudotimeScore) + 0.05, 0.05), dig.lab = 2, right = FALSE)) %>%
group_by(Lineage, Pseudotime.bins) %>%
summarise(n=n())
ggplot(Pseudotime.intervals, aes(x=Pseudotime.bins, y=n, fill=Lineage)) +
geom_bar(stat = "identity", width = 0.90) +
theme(axis.text.x = element_text(angle = 45, hjust=1))+
scale_fill_manual(values= c("#cc391b", "#026c9a"))score <- sapply(Trajectories.neurons$PseudotimeScore,
FUN = function(x) if (x <= 0.2) {x= 0.2} else { x=x })
Trajectories.neurons$PseudotimeScore.shifted <- (score - min(score)) / (max(score) - min(score))ggplot(Trajectories.neurons, aes(x= PseudotimeScore.shifted, y= nCount_RNA/10000)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a")) +
geom_smooth(method="loess", n= 50, aes(color= Lineage)) +
ylim(0,NA)rm(list = ls()[!ls() %in% c("Trajectories.neurons")])Progenitors.data <- readRDS("../ProgenitorsDiversity/Progenitors.RDS")table(Progenitors.data$Cell_ident)##
## Dorso-Medial_pallium Hem Medial_pallium
## 3451 1954 2719
To balance the number of progenitors in both domain we will only work with Hem and Medial_pallium annotated cells. Since we are using pallial cell to contrast CR specific trajectory we think this approximation will not significantly affect our analysis.
Progenitors.data <- subset(Progenitors.data, subset = Cell_ident %in% c("Hem", "Medial_pallium") & orig.ident == "Hem1")p1 <- DimPlot(Progenitors.data,
reduction = "spring",
pt.size = 0.5,
cols = c("#e3c148", "#e46b6b")) + NoAxes()
p2 <- FeaturePlot(object = Progenitors.data,
features = "Revelio.cc",
pt.size = 0.5,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "spring",
order = T) & NoAxes()
p3 <- DimPlot(object = Progenitors.data,
group.by = "Revelio.phase",
pt.size = 0.5,
reduction = "spring",
cols = c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) & NoAxes()
p1 + p2 + p3 + patchwork::plot_layout(ncol = 2)# Start with neurons data
Trajectories.all <- Trajectories.neurons %>% select(Barcodes, nCount_RNA, Spring_1, Spring_2, Lineage)
Trajectories.all$Pseudotime <- Trajectories.neurons$PseudotimeScore.shifted + 0.5
Trajectories.all$Phase <- NA# Add progenitors data
Trajectories.progenitors <- Progenitors.data@meta.data %>%
select(Barcodes, nCount_RNA, Spring_1, Spring_2) %>%
mutate(Lineage= ifelse(Progenitors.data$Cell_ident == "Medial_pallium", "Pallial_neurons", "Cajal-Retzius_neurons") ,
Pseudotime= Progenitors.data$Revelio.cc/2,
Phase = Progenitors.data$Revelio.phase)Trajectories.WT<- rbind(Trajectories.all, Trajectories.progenitors)
Trajectories.WT$Phase <- factor(Trajectories.WT$Phase, levels = c("G1.S", "S", "G2", "G2.M", "M.G1"))p1 <- ggplot(Trajectories.WT, aes(Spring_1, Spring_2)) +
geom_point(aes(color=Pseudotime), size=0.5) +
scale_color_gradientn(colours=rev(brewer.pal(n =11, name = "Spectral")), name='Pseudotime score')
p2 <- ggplot(Trajectories.WT, aes(Spring_1, Spring_2)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a"))
p1 + p2p1 <- ggplot(Trajectories.WT, aes(x= Pseudotime, y= nCount_RNA/10000)) +
geom_point(aes(color= Phase), size=0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)
p2 <- ggplot(Trajectories.WT, aes(x= Pseudotime, y= nCount_RNA/10000)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a")) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)
p1 / p2rm(list = ls()[!ls() %in% c("Trajectories.WT")])
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3459607 184.8 6126737 327.3 6126737 327.3
## Vcells 6013142 45.9 798233212 6090.1 1064969141 8125.1
WT.KO.integrated <- readRDS("WT_KO.integrated.RDS")Neurons.data <- subset(WT.KO.integrated,
subset = Lineage %in% c("Cajal-Retzius_neurons", "Pallial_neurons") & orig.ident == "Gmnc_KO" & Cell.ident.KO %in% c("Neuron_prob.2", "Neuron_prob.3"))
DimPlot(Neurons.data,
group.by = "Lineage",
reduction = "integrated_spring",
pt.size = 1,
cols = c("#cc391b","#026c9a")
) + NoAxes()fit <- principal_curve(as.matrix(Neurons.data@meta.data[,c("Spring_1", "Spring_2")]),
smoother='lowess',
trace=TRUE,
f = 1,
stretch=0)## Starting curve---distance^2: 76242448164
## Iteration 1---distance^2: 48308567
## Iteration 2---distance^2: 49237931
## Iteration 3---distance^2: 50119026
## Iteration 4---distance^2: 51105768
## Iteration 5---distance^2: 51819999
## Iteration 6---distance^2: 52369574
## Iteration 7---distance^2: 52732675
## Iteration 8---distance^2: 52936995
## Iteration 9---distance^2: 53060989
## Iteration 10---distance^2: 53117718
#Pseudotime score
PseudotimeScore <- fit$lambda/max(fit$lambda)
if (cor(PseudotimeScore, Neurons.data@assays$RNA@data['Hmga2', ]) > 0) {
Neurons.data$PseudotimeScore <- -(PseudotimeScore - max(PseudotimeScore))
}
cols <- brewer.pal(n =11, name = "Spectral")
ggplot(Neurons.data@meta.data, aes(Spring_1, Spring_2)) +
geom_point(aes(color=PseudotimeScore), size=2, shape=16) +
scale_color_gradientn(colours=rev(cols), name='Pseudotime score')score <- sapply(Neurons.data$PseudotimeScore,
FUN = function(x) if (x <= 0.25) {x= 0.25} else { x=x })
Neurons.data$PseudotimeScore.shifted <- (score - min(score)) / (max(score) - min(score))ggplot(Neurons.data@meta.data, aes(x= PseudotimeScore.shifted, y= nCount_RNA/10000)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a")) +
geom_smooth(method="loess", n= 50, aes(color= Lineage)) +
ylim(0,NA)To balance the number of progenitors in both domain we will only work with Hem and Medial_pallium annotated cells. Since we are using pallial cell to contrast CR specific trajectory we think this approximation will not significantly affect our analysis.
Progenitors.data <- subset(WT.KO.integrated,
subset = Lineage %in% c("Cajal-Retzius_neurons", "Pallial_neurons") & orig.ident == "Gmnc_KO" & Cell.ident.KO %in% c("Hem", "Medial_pallium"))
DimPlot(Progenitors.data,
reduction = "integrated_spring",
group.by = "Cell.ident.KO",
pt.size = 1,
cols = c("#cc391b","#026c9a")
) + NoAxes()rm(WT.KO.integrated)
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3507624 187.4 6126737 327.3 6126737 327.3
## Vcells 298001985 2273.6 1144872590 8734.7 1331671700 10159.9
rawCounts <- as.matrix(Progenitors.data[["RNA"]]@counts)
# Filter genes expressed by less than 10 cells
num.cells <- Matrix::rowSums(rawCounts > 0)
genes.use <- names(x = num.cells[which(x = num.cells >= 10)])
rawCounts <- rawCounts[genes.use, ]CCgenes <- read.table("../ChoroidPlexus_trajectory/CCgenes.csv", sep = ";", header = T)We can now follow the tutorial form the package github page
myData <- createRevelioObject(rawData = rawCounts,
cyclicGenes = CCgenes,
lowernGeneCutoff = 0,
uppernUMICutoff = Inf,
ccPhaseAssignBasedOnIndividualBatches = F)## 2022-06-13 15:19:40: reading data: 5.59secs
rm("rawCounts")
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3503173 187.1 6126737 327.3 6126737 327.3
## Vcells 357827865 2730.1 1144872590 8734.7 1331671700 10159.9
The getCellCyclePhaseAssignInformation filter “outlier” cells for cell cycle phase assignation. We modified the function to keep all cells as we observed this does not affect the global cell cycle fitting procedure
source("../Functions/functions_InitializationCCPhaseAssignFiltering.R")
myData <- getCellCyclePhaseAssign_allcells(myData)## 2022-06-13 15:19:55: assigning cell cycle phases: 32.85secs
myData <- getPCAData(dataList = myData)## 2022-06-13 15:20:45: calculating PCA: 25.35secs
myData <- getOptimalRotation(dataList = myData)## 2022-06-13 15:21:10: calculating optimal rotation: 14.21secs
CellCycledata <- cbind(as.data.frame(t(myData@transformedData$dc$data[1:2,])),
nUMI= myData@cellInfo$nUMI,
Revelio.phase = factor(myData@cellInfo$ccPhase, levels = c("G1.S", "S", "G2", "G2.M", "M.G1")),
Revelio.cc= myData@cellInfo$ccPercentageUniformlySpaced,
Domain= Progenitors.data$Cell.ident.KO)p1 <- ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color = Revelio.phase), size= 0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5]))
p2 <- ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color = Domain), size = 0.5) +
scale_color_manual(values= c("#cc391b","#026c9a"))
p3 <- ggplot(CellCycledata, aes(DC1, DC2)) +
geom_point(aes(color=Revelio.cc), size=0.5, shape=16) +
scale_color_gradientn(colours=rev(colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)),
name='Revelio_cc')
p4 <- ggplot(CellCycledata, aes(x= Revelio.cc, y= nUMI/10000)) +
geom_point(aes(color= Revelio.phase), size=0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)
(p1 + p2) /(p3 + p4)Progenitors.data$Revelio.phase <- CellCycledata$Revelio.phase
Progenitors.data$Revelio.cc <- CellCycledata$Revelio.cc
p1 <- FeaturePlot(object = Progenitors.data,
features = "Revelio.cc",
pt.size = 1,
cols = rev(brewer.pal(10,"Spectral")),
reduction = "integrated_spring",
order = T) & NoAxes()
p2 <- DimPlot(object = Progenitors.data,
group.by = "Revelio.phase",
pt.size = 1,
reduction = "integrated_spring",
cols = c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) & NoAxes()
p1 / p2Progenitors <- Progenitors.data@meta.data %>% select(Barcodes, nCount_RNA, Spring_1, Spring_2, Lineage)
Progenitors$PseudotimeScore <- CellCycledata$Revelio.cc
Progenitors$nUMI <- Progenitors.data$nCount_RNA# Start with neurons data
Trajectories.all <- Neurons.data@meta.data %>% select(Barcodes, nCount_RNA, Spring_1, Spring_2, Lineage)
Trajectories.all$Pseudotime <- Neurons.data$PseudotimeScore.shifted + 0.5
Trajectories.all$Phase <- NA# Add progenitors data
Trajectories.progenitors <- Progenitors %>%
select(Barcodes, nCount_RNA, Spring_1, Spring_2, Lineage) %>%
mutate(Pseudotime= Progenitors.data$Revelio.cc/2,
Phase = Progenitors.data$Revelio.phase)Trajectories.KO <- rbind(Trajectories.all, Trajectories.progenitors)
Trajectories.KO$Phase <- factor(Trajectories.KO$Phase, levels = c("G1.S", "S", "G2", "G2.M", "M.G1"))p1 <- ggplot(Trajectories.KO, aes(Spring_1, Spring_2)) +
geom_point(aes(color=Pseudotime), size=0.5) +
scale_color_gradientn(colours=rev(brewer.pal(n =11, name = "Spectral")), name='Pseudotime score')
p2 <- ggplot(Trajectories.KO, aes(Spring_1, Spring_2)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a"))
p1 + p2p1 <- ggplot(Trajectories.KO, aes(x= Pseudotime, y= nCount_RNA/10000)) +
geom_point(aes(color= Phase), size=0.5) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) +
geom_smooth(method="loess", n= 50, fill="grey") +
ylim(0,NA)
p2 <- ggplot(Trajectories.KO, aes(x= Pseudotime, y= nCount_RNA/10000)) +
geom_point(aes(color= Lineage), size=0.5) +
scale_color_manual(values= c("#cc391b", "#026c9a")) +
geom_smooth(aes(color= Lineage), method="loess", n= 50, fill="grey") +
ylim(0,NA)
p1 / p2Trajectories <- rbind(Trajectories.KO, Trajectories.WT)
rm(list = ls()[!ls() %in% c("Trajectories")])WT.KO.integrated <- readRDS("WT_KO.integrated.RDS")
meta.data <- WT.KO.integrated@meta.data[Trajectories$Barcodes,]
meta.data$Pseudotime <- Trajectories$Pseudotime
meta.data$Phase <- Trajectories$PhaseWT.KO.integrated <- CreateSeuratObject(counts = WT.KO.integrated@assays$RNA@counts[, Trajectories$Barcodes],
meta.data = meta.data)
spring <- as.matrix(WT.KO.integrated@meta.data %>% select("Integrated_Spring_1", "Integrated_Spring_2"))
WT.KO.integrated[["integrated_spring"]] <- CreateDimReducObject(embeddings = spring, key = "Spring_", assay = DefaultAssay(WT.KO.integrated))p1 <- FeaturePlot(object = WT.KO.integrated,
features = "Pseudotime",
pt.size = 0.5,
cols = rev(colorRampPalette(brewer.pal(n =11, name = "Spectral"))(100)),
reduction = "integrated_spring",
order = T) & NoAxes()
p2 <- DimPlot(object = WT.KO.integrated,
group.by = "Lineage",
pt.size = 0.5,
reduction = "integrated_spring",
cols = c("#cc391b", "#026c9a")) & NoAxes()
p3 <- DimPlot(object = WT.KO.integrated,
group.by = "Phase",
pt.size = 0.5,
reduction = "integrated_spring",
cols = c(wes_palette("FantasticFox1")[1:3],"grey40",wes_palette("FantasticFox1")[5])) & NoAxes()
p1 + p2 + p3WT.KO.integrated <- NormalizeData(WT.KO.integrated, normalization.method = "LogNormalize", scale.factor = 10000, assay = "RNA")WT.KO.integrated <- FindVariableFeatures(WT.KO.integrated, selection.method = "disp", nfeatures = 6500, assay = "RNA")source("../Functions/functions_GeneTrends.R")
Plot.Genes.trend(Seurat.data= WT.KO.integrated,
group.by = "Genotype",
genes= c("Gas1","Sox2",
"Neurog2", "Btg2",
"Tbr1", "Mapt",
"Trp73", "Foxg1"))Plot.Genes.trend(Seurat.data= WT.KO.integrated,
group.by = "Genotype",
genes= c("Gmnc", "Mcidas",
"Ccno", "Ccdc67",
"Foxj1", "Trp73",
"Lhx1", "Cdkn1a"))CR <- WT.KO.integrated@meta.data %>% filter(Lineage == "Cajal-Retzius_neurons") %>% select(Barcodes,Pseudotime,orig.ident)
CR.genes <- as.data.frame(t(WT.KO.integrated@assays$RNA@data[c("Sox2","Reln", "Nhlh2", "Ebf3"),CR$Barcodes]))
CR.genes$Pseudotime <- CR$Pseudotime
CR.genes$Genotype <- factor(CR$orig.ident, levels = c("Hem1", "Gmnc_KO") )
CR.genes <- reshape2::melt(CR.genes, id = c("Pseudotime", "Genotype"))
ggplot(CR.genes, aes(x= Pseudotime, y= value)) +
geom_smooth(method="loess", n= 50, aes(color= variable, linetype= Genotype)) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],wes_palette("FantasticFox1")[5])) +
ylim(0,NA)rm(list = ls()[!ls() %in% c("WT.KO.integrated")])
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3556199 190.0 6126737 327.3 6126737 327.3
## Vcells 106929041 815.9 732718458 5590.2 1331671700 10159.9
# Transfer metadata
Annot.data <- new('AnnotatedDataFrame',
data = data.frame(Barcode= WT.KO.integrated$Barcodes,
Lineage= WT.KO.integrated$Lineage,
Pseudotime= WT.KO.integrated$Pseudotime,
Phase= WT.KO.integrated$Phase,
Genotype= WT.KO.integrated$orig.ident))
# Transfer counts data
var.genes <- WT.KO.integrated[["RNA"]]@var.features
feature.data <- new('AnnotatedDataFrame',
data = data.frame(gene_short_name = rownames(WT.KO.integrated[["RNA"]]@counts[var.genes,]),
row.names = rownames(WT.KO.integrated[["RNA"]]@counts[var.genes,])))
# Create the CellDataSet object including variable genes only
gbm_cds <- newCellDataSet(WT.KO.integrated[["RNA"]]@counts[var.genes,],
phenoData = Annot.data,
featureData = feature.data,
lowerDetectionLimit = 0,
expressionFamily = negbinomial())gbm_cds <- estimateSizeFactors(gbm_cds)
gbm_cds <- estimateDispersions(gbm_cds)
gbm_cds <- detectGenes(gbm_cds, min_expr = 0.1)rm(list = ls()[!ls() %in% c("WT.KO.integrated", "gbm_cds")])
gc()## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3603400 192.5 6126737 327.3 6126737 327.3
## Vcells 129493305 988.0 586174767 4472.2 1331671700 10159.9
# Load WT CR variable genes along pseudotime
WT.CR.genes <- read.table("../CajalRetzius_trajectory/CR_dynamic_genes.csv", sep = ";", header = T)
# Extract CR lineage in both genotypes
CR.pData <- pData(gbm_cds) %>% subset(Lineage == "Cajal-Retzius_neurons")
# Create a new pseudotime vector of 300 points
nPoints <- 300
new_data = list()
for (Genotype in unique(CR.pData$Genotype)){
new_data[[length(new_data) + 1]] = data.frame(Pseudotime = seq(min(CR.pData$Pseudotime), max(CR.pData$Pseudotime), length.out = nPoints), Genotype= Genotype)
}
new_data = do.call(rbind, new_data)
# Smooth gene expression
Diff.curve_matrix <- genSmoothCurves(gbm_cds[WT.CR.genes$Gene, CR.pData$Barcode],
trend_formula = "~sm.ns(Pseudotime, df = 3)*Genotype",
relative_expr = TRUE,
new_data = new_data,
cores= parallel::detectCores() - 2)Pseudotime.genes.clusters <- cluster::pam(as.dist((1 - cor(Matrix::t(Diff.curve_matrix[,301:600]),method = "pearson"))), k= 5)
CR.Gene.dynamique <- data.frame(Gene= names(Pseudotime.genes.clusters$clustering),
Waves= Pseudotime.genes.clusters$clustering,
Gene.Clusters = Pseudotime.genes.clusters$clustering) %>% arrange(Gene.Clusters)
row.names(CR.Gene.dynamique) <- CR.Gene.dynamique$Gene
CR.Gene.dynamique$Gene.Clusters <- paste0("Clust.", CR.Gene.dynamique$Gene.Clusters)# Order the rows using seriation
dst <- as.dist((1-cor(scale(t(Diff.curve_matrix[,301:600])), method = "pearson")))
row.ser <- seriation::seriate(dst, method ="R2E") #"R2E" #TSP #"GW" "GW_ward"
gene.order <- rownames(Diff.curve_matrix[,301:600][get_order(row.ser),])
# Set annotation colors
pal <- wes_palette("Darjeeling1")
anno.colors <- list(lineage = c(Gmnc_KO="#026c9a", Gmnc_WT="#cc391b"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(300:1,#KO
301:600)], #WT
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(lineage = rep(c("Gmnc_KO","Gmnc_WT"), each=300)),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = F,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "")gene.order <- gene.order[c(382:622,1:381)]
pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(300:1,#Pal
301:600)], #CR
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(lineage = rep(c("Gmnc_KO","Gmnc_WT"), each=300)),
annotation_colors = anno.colors,
show_colnames = F,
gaps_col = c(200,400),
show_rownames = F,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "WT CR enriched genes along GmncKO and GmncWT CR trajectories")anno.colors <- list(Cell.state = c(Cycling_RG="#046c9a", Differentiating_cells="#ebcb2e"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
col.anno <- data.frame(Cell.state = rep(c("Cycling_RG","Differentiating_cells"), c(100,200)))
rownames(col.anno) <- 301:600
pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(301:600)], #CR
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = col.anno,
annotation_colors = anno.colors,
gaps_col = 100,
show_colnames = F,
show_rownames = F,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "WT CR enriched genes along GmncKO and GmncWT CR trajectories")anno.colors <- list(Cell.state = c(Cycling_RG="#046c9a", Differentiating_cells="#ebcb2e"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(1:300)], #CR
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(Cell.state = rep(c("Cycling_RG","Differentiating_cells"), c(100,200))),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = F,
gaps_col = 100,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "WT CR enriched genes along GmncKO CR trajectories")CR <- WT.KO.integrated@meta.data %>% filter(Lineage == "Cajal-Retzius_neurons") %>% select(Barcodes,Pseudotime,orig.ident)
CR.genes <- as.data.frame(t(WT.KO.integrated@assays$RNA@data[c("Gmnc","Trp73", "Lhx1", "Barhl2"),CR$Barcodes]))
CR.genes$Pseudotime <- CR$Pseudotime
CR.genes$Genotype <- factor(CR$orig.ident, levels = c("Hem1", "Gmnc_KO") )
CR.genes <- reshape2::melt(CR.genes, id = c("Pseudotime", "Genotype"))
ggplot(CR.genes, aes(x= Pseudotime, y= value)) +
geom_smooth(method="loess", n= 50, aes(color= variable, linetype= Genotype)) +
scale_color_manual(values= c(wes_palette("FantasticFox1")[1:3],wes_palette("FantasticFox1")[5])) +
ylim(0,NA) # DEG genes in the KO
KO.pData <- pData(gbm_cds) %>% subset(Genotype == "Gmnc_KO")
pseudo.maturation.diff <- differentialGeneTest(gbm_cds[fData(gbm_cds)$num_cells_expressed > 80, KO.pData$Barcode],
fullModelFormulaStr = "~sm.ns(Pseudotime, df = 3)*Lineage",
reducedModelFormulaStr = "~sm.ns(Pseudotime, df = 3)",
cores = parallel::detectCores() - 2)
# Filter genes based on FDR
pseudo.maturation.diff.filtered <- pseudo.maturation.diff %>% filter(qval < 1e-40)# Create a new pseudo-DV vector of 300 points
nPoints <- 300
new_data = list()
for (Lineage in unique(KO.pData$Lineage)){
new_data[[length(new_data) + 1]] = data.frame(Pseudotime = seq(min(KO.pData$Pseudotime), max(KO.pData$Pseudotime), length.out = nPoints), Lineage=Lineage)
}
new_data = do.call(rbind, new_data)
# Smooth gene expression
Diff.curve_matrix <- genSmoothCurves(gbm_cds[as.character(pseudo.maturation.diff.filtered$gene_short_name), KO.pData$Barcode],
trend_formula = "~sm.ns(Pseudotime, df = 3)*Lineage",
relative_expr = TRUE,
new_data = new_data,
cores= parallel::detectCores() - 2)# Extract matrix containing smoothed curves for each lineages
Pal_curve_matrix <- Diff.curve_matrix[, 1:nPoints] #Pallial points
CR_curve_matrix <- Diff.curve_matrix[, (nPoints + 1):(2 * nPoints)] #CR points
# Direction of the comparison : postive ABCs <=> Upregulated in CR lineage
ABCs_res <- CR_curve_matrix - Pal_curve_matrix
# Average logFC between the 2 curves
ILR_res <- log2(CR_curve_matrix/ (Pal_curve_matrix + 0.1))
ABCs_res <- apply(ABCs_res, 1, function(x, nPoints) {
avg_delta_x <- (x[1:(nPoints - 1)] + x[2:(nPoints)])/2
step <- (100/(nPoints - 1))
res <- round(sum(avg_delta_x * step), 3)
return(res)},
nPoints = nPoints) # Compute the area below the curve
ABCs_res <- cbind(ABCs_res, ILR_res[,ncol(ILR_res)])
colnames(ABCs_res)<- c("ABCs", "Endpoint_ILR")
# Import ABC values into the DE test results table
pseudo.maturation.diff.filtered <- cbind(pseudo.maturation.diff.filtered[,1:4],
ABCs_res,
pseudo.maturation.diff.filtered[,5:6])# Extract Cajal-Retzius expressed genes
CR.res <- as.data.frame(pseudo.maturation.diff.filtered[pseudo.maturation.diff.filtered$ABCs > 0,])
CR.genes <- row.names(CR.res)
CR_curve_matrix <- CR_curve_matrix[CR.genes, ]Pseudotime.genes.clusters <- cluster::pam(as.dist((1 - cor(Matrix::t(CR_curve_matrix),method = "pearson"))), k= 5)
CR.Gene.dynamique <- data.frame(Gene= names(Pseudotime.genes.clusters$clustering),
Waves= Pseudotime.genes.clusters$clustering,
Gene.Clusters = Pseudotime.genes.clusters$clustering,
q.val = CR.res$qval,
ABCs= CR.res$ABCs
) %>% arrange(Gene.Clusters)
row.names(CR.Gene.dynamique) <- CR.Gene.dynamique$Gene
CR.Gene.dynamique$Gene.Clusters <- paste0("Clust.", CR.Gene.dynamique$Gene.Clusters)
write.table(CR.Gene.dynamique, "KO_CR_dynamic_genes.csv", sep = ";", quote = F, row.names = F)# Order the rows using seriation
dst <- as.dist((1-cor(scale(t(CR_curve_matrix)), method = "pearson")))
row.ser <- seriation::seriate(dst, method ="R2E") #"R2E" #TSP #"GW" "GW_ward"
gene.order <- rownames(CR_curve_matrix[get_order(row.ser),])
# Set annotation colors
pal <- wes_palette("Darjeeling1")
anno.colors <- list(lineage = c(Pallial_neurons="#026c9a", Cajal_Retzius="#cc391b"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(300:1,#Pal
301:600)], #CR
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = CR.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(lineage = rep(c("Pallial_neurons","Cajal_Retzius"), each=300)),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = F,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "") ## KO Pallial neurons trajectory analysis
# Extract Pallial neurons trajectory genes
Pal.res <- as.data.frame(pseudo.maturation.diff.filtered[pseudo.maturation.diff.filtered$ABCs < 0,])
Pal.genes <- row.names(Pal.res)
Pal_curve_matrix <- Pal_curve_matrix[Pal.genes, ]## Cluster gene by expression profiles
Pseudotime.genes.clusters <- cluster::pam(as.dist((1 - cor(Matrix::t(Pal_curve_matrix),method = "pearson"))), k= 5)
Pal.Gene.dynamique <- data.frame(Gene= names(Pseudotime.genes.clusters$clustering),
Waves= Pseudotime.genes.clusters$clustering,
Gene.Clusters = Pseudotime.genes.clusters$clustering,
q.val = Pal.res$pval,
ABCs= Pal.res$ABCs
) %>% arrange(Gene.Clusters)
row.names(Pal.Gene.dynamique) <- Pal.Gene.dynamique$Gene
Pal.Gene.dynamique$Gene.Clusters <- paste0("Clust.", Pal.Gene.dynamique$Gene.Clusters)# Order the rows using seriation
dst <- as.dist((1-cor(scale(t(Pal_curve_matrix)), method = "pearson")))
row.ser <- seriation::seriate(dst, method ="R2E") #"R2E" #TSP #"GW" "GW_ward"
gene.order <- rownames(Pal_curve_matrix[get_order(row.ser),])
# Set annotation colors
pal <- wes_palette("Darjeeling1")
anno.colors <- list(lineage = c(Pallial_neurons="#026c9a", Cajal_Retzius="#cc391b"),
Gene.Clusters = c(Clust.1 =pal[1] , Clust.2=pal[2], Clust.3=pal[3], Clust.4=pal[4], Clust.5=pal[5]))
pheatmap::pheatmap(Diff.curve_matrix[gene.order,
c(300:1,#Pal
301:600)], #CR
scale = "row",
cluster_rows = F,
cluster_cols = F,
annotation_row = Pal.Gene.dynamique %>% dplyr::select(Gene.Clusters),
annotation_col = data.frame(lineage = rep(c("Pallial_neurons","Cajal_Retzius"), each=300)),
annotation_colors = anno.colors,
show_colnames = F,
show_rownames = F,
fontsize_row = 8,
color = viridis::viridis(9),
breaks = seq(-2.5,2.5, length.out = 9),
main = "")#date
format(Sys.time(), "%d %B, %Y, %H,%M")## [1] "13 juin, 2022, 15,48"
#Packages used
sessionInfo()## R version 4.2.0 (2022-04-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=fr_FR.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fr_FR.UTF-8 LC_COLLATE=fr_FR.UTF-8
## [5] LC_MONETARY=fr_FR.UTF-8 LC_MESSAGES=fr_FR.UTF-8
## [7] LC_PAPER=fr_FR.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] splines stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] wesanderson_0.3.6 cowplot_1.1.1 ggExtra_0.9
## [4] RColorBrewer_1.1-2 dplyr_1.0.7 seriation_1.3.1
## [7] gprofiler2_0.2.1 monocle_2.22.0 DDRTree_0.1.5
## [10] irlba_2.3.3 VGAM_1.1-5 ggplot2_3.3.5
## [13] Biobase_2.54.0 BiocGenerics_0.40.0 Matrix_1.4-1
## [16] Revelio_0.1.0 princurve_2.1.6 SeuratObject_4.0.4
## [19] Seurat_4.0.5
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.6 igraph_1.2.11 lazyeval_0.2.2
## [4] densityClust_0.3 listenv_0.8.0 scattermore_0.7
## [7] fastICA_1.2-3 digest_0.6.29 foreach_1.5.1
## [10] htmltools_0.5.2 viridis_0.6.2 fansi_0.5.0
## [13] magrittr_2.0.2 tensor_1.5 cluster_2.1.3
## [16] ROCR_1.0-11 limma_3.50.0 globals_0.14.0
## [19] matrixStats_0.61.0 docopt_0.7.1 spatstat.sparse_2.0-0
## [22] colorspace_2.0-2 ggrepel_0.9.1 xfun_0.28
## [25] sparsesvd_0.2 crayon_1.4.2 jsonlite_1.7.2
## [28] spatstat.data_2.1-0 survival_3.2-13 zoo_1.8-9
## [31] iterators_1.0.13 glue_1.5.1 polyclip_1.10-0
## [34] registry_0.5-1 gtable_0.3.0 leiden_0.3.9
## [37] future.apply_1.8.1 abind_1.4-5 scales_1.1.1
## [40] pheatmap_1.0.12 DBI_1.1.1 miniUI_0.1.1.1
## [43] Rcpp_1.0.8 viridisLite_0.4.0 xtable_1.8-4
## [46] reticulate_1.22 spatstat.core_2.3-1 htmlwidgets_1.5.4
## [49] httr_1.4.2 FNN_1.1.3 ellipsis_0.3.2
## [52] ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3
## [55] sass_0.4.0 uwot_0.1.10 deldir_1.0-6
## [58] utf8_1.2.2 labeling_0.4.2 tidyselect_1.1.1
## [61] rlang_0.4.12 reshape2_1.4.4 later_1.3.0
## [64] munsell_0.5.0 tools_4.2.0 generics_0.1.1
## [67] ggridges_0.5.3 evaluate_0.14 stringr_1.4.0
## [70] fastmap_1.1.0 yaml_2.2.1 goftest_1.2-3
## [73] knitr_1.36 fitdistrplus_1.1-6 purrr_0.3.4
## [76] RANN_2.6.1 pbapply_1.5-0 future_1.23.0
## [79] nlme_3.1-153 mime_0.12 slam_0.1-49
## [82] compiler_4.2.0 plotly_4.10.0 png_0.1-7
## [85] spatstat.utils_2.2-0 tibble_3.1.6 bslib_0.3.1
## [88] stringi_1.7.6 highr_0.9 lattice_0.20-45
## [91] HSMMSingleCell_1.14.0 vctrs_0.3.8 pillar_1.6.4
## [94] lifecycle_1.0.1 spatstat.geom_2.3-0 combinat_0.0-8
## [97] lmtest_0.9-39 jquerylib_0.1.4 RcppAnnoy_0.0.19
## [100] data.table_1.14.2 httpuv_1.6.3 patchwork_1.1.1
## [103] R6_2.5.1 promises_1.2.0.1 TSP_1.1-11
## [106] KernSmooth_2.23-20 gridExtra_2.3 parallelly_1.29.0
## [109] codetools_0.2-18 MASS_7.3-57 assertthat_0.2.1
## [112] withr_2.4.3 qlcMatrix_0.9.7 sctransform_0.3.2
## [115] mgcv_1.8-40 parallel_4.2.0 grid_4.2.0
## [118] rpart_4.1.16 tidyr_1.1.4 rmarkdown_2.11
## [121] Rtsne_0.15 shiny_1.7.1
Institute of Psychiatry and Neuroscience of Paris, INSERM U1266, 75014, Paris, France, matthieu.moreau@inserm.fr↩︎